The models that are used here to derive the posterior samples were selected using rscripts loo comparison.
Load all required packages for analysis.
library(tidyverse) # for data manipulation
library(pmthemes) # for ggplot themes
library(knitr) # for tables
library(sf)
## Warning: package 'sf' was built under R version 4.1.2
library(ggthemes)
library(viridis)
library(scales)
library(lubridate)
library(brms)
library(arsenal)
## Warning: package 'arsenal' was built under R version 4.1.2
library(knitr)
library(here)
## Warning: package 'here' was built under R version 4.1.1
library(spdep)
## Warning: package 'sp' was built under R version 4.1.2
library(broom)
library(RANN)
library(forcats)
library(cowplot)
library(bayesplot)
library(psych)
library(usethis)
Import data required for the analysis.
dat <- readRDS(here::here("data/dat.rds"))
dat <- sf::st_set_geometry(dat, NULL)
prev_model_rintc_31 <- readRDS(here::here("data/prev_model_rintc_31.rds"))
notif_model_rintc_25 <- readRDS(here::here("data/notif_model_rintc_25.rds"))
summary(prev_model_rintc_31)
## Family: zero_inflated_poisson
## Links: mu = log; zi = identity
## Formula: n_prev_tbcases ~ 1 + scale_prop_adults_mean + (1 | cluster) + offset(log(tent_cxr_total))
## Data: dat_scale (Number of observations: 72)
## Samples: 3 chains, each with iter = 15000; warmup = 1000; thin = 1;
## total post-warmup samples = 42000
##
## Group-Level Effects:
## ~cluster (Number of levels: 72)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.33 0.24 0.01 0.90 1.00 14078 16689
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept -6.07 0.28 -6.63 -5.51 1.00 26216
## scale_prop_adults_mean -0.06 0.08 -0.22 0.09 1.00 49468
## Tail_ESS
## Intercept 28098
## scale_prop_adults_mean 28972
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## zi 0.18 0.13 0.01 0.46 1.00 27370 19772
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(prev_model_rintc_31)
# Prevalence model
# extract the posterior samples from the prevalence model
# Remember need to use centred variables - describe clearly in the model code how you have centred.
post_tb_prev_m31 <- posterior_samples(prev_model_rintc_31)
# add the posterior intercepts to the random effects for each cluster
# mutate(across(contains("r_cluster"), ~((zi + (1-zi))*(exp(b_intercept + .x))))) %>%
prev_m_sum <- post_tb_prev_m31 %>%
janitor::clean_names() %>%
select(b_intercept, zi, contains("r_cluster")) %>%
mutate(across(contains("r_cluster"), ~ (((1 - zi)) * (exp(b_intercept + .x))))) %>%
select(-b_intercept, -zi) %>%
rename_with(.fn = ~ stringr::str_replace_all(., "r_cluster", "p_prev")) %>%
rename_with(.fn = ~ stringr::str_replace_all(., "_intercept", ""))
# have a look at the table
cluster_prevtotals <- select(dat, cluster, tent_cxr_total) %>%
pivot_wider(names_from = cluster, values_from = tent_cxr_total) %>%
rename_with(.fn = ~ stringr::str_replace_all(., "c", "p_prev_"))
# multiply by the cluster totals, to get the prevalent cases per cluster
# prev_m_sum <- prev_m_sum %>%
# mutate(across(everything(), ~.x * cluster_prevtotals[[cur_column()]]))
# prev_m_sum
prev_m_sum <- map2_df(prev_m_sum, cluster_prevtotals, ~ (.x * .y))
# posterior density plots for all clusters
# This gets the original prevalent cases per cluster from the study to compare with the model ones
cluster2019prev <- select(dat, cluster, n_prev_tbcases) %>%
mutate(name = stringr::str_replace_all(cluster, "c", "p_prev_")) %>%
mutate(name = fct_inorder(name))
prev_m_sum <- prev_m_sum %>%
rename_all(.funs = funs(str_replace(., "c", "")))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
prev_m_sum <- prev_m_sum %>%
select(num_range("p_prev_", range = 1:72))
prev_m_sum %>%
pivot_longer(p_prev_1:p_prev_20) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_density(aes(x = value)) +
geom_vline(
data = cluster2019prev[1:20, ],
aes(xintercept = n_prev_tbcases), color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
prev_m_sum %>%
pivot_longer(p_prev_1:p_prev_20) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_boxplot(aes(y = value, x = name)) +
geom_hline(
data = cluster2019prev[1:20, ],
aes(yintercept = n_prev_tbcases),
color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
prev_m_sum %>%
pivot_longer(p_prev_21:p_prev_41) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_density(aes(x = value)) +
geom_vline(
data = cluster2019prev[21:41, ], aes(xintercept = n_prev_tbcases),
color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
prev_m_sum %>%
pivot_longer(p_prev_21:p_prev_41) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_boxplot(aes(y = value, x = name)) +
geom_hline(
data = cluster2019prev[21:41, ],
aes(yintercept = n_prev_tbcases), color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
prev_m_sum %>%
pivot_longer(p_prev_42:p_prev_62) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_density(aes(x = value)) +
geom_vline(
data = cluster2019prev[42:62, ], aes(xintercept = n_prev_tbcases),
color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
prev_m_sum %>%
pivot_longer(p_prev_42:p_prev_62) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_boxplot(aes(y = value, x = name)) +
geom_hline(
data = cluster2019prev[42:62, ],
aes(yintercept = n_prev_tbcases), color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
prev_m_sum %>%
pivot_longer(p_prev_63:p_prev_72) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_density(aes(x = value)) +
geom_vline(
data = cluster2019prev[63:72, ], aes(xintercept = n_prev_tbcases),
color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
prev_m_sum %>%
pivot_longer(p_prev_63:p_prev_72) %>%
select(name, value) %>%
mutate(name = fct_inorder(name)) %>%
ggplot() +
geom_boxplot(aes(y = value, x = name)) +
geom_hline(
data = cluster2019prev[63:72, ],
aes(yintercept = n_prev_tbcases), color = "red"
) +
facet_wrap(name ~ ., scales = "free") +
theme_minimal()
summary(notif_model_rintc_25)
## Family: poisson
## Links: mu = log
## Formula: total_confirmed ~ 1 + scale_prop_adults_mean + scale_perc_never_primary_mean + scale_clinic_distance_1km + tb_year + (1 | cluster) + offset(log(population))
## Data: dat_scale_ln (Number of observations: 360)
## Samples: 3 chains, each with iter = 15000; warmup = 1000; thin = 1;
## total post-warmup samples = 42000
##
## Group-Level Effects:
## ~cluster (Number of levels: 72)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.31 0.04 0.24 0.39 1.00 15832 25699
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept -7.58 0.08 -7.75 -7.42 1.00
## scale_prop_adults_mean -0.04 0.02 -0.07 0.00 1.00
## scale_perc_never_primary_mean -0.02 0.01 -0.04 -0.01 1.00
## scale_clinic_distance_1km -0.25 0.06 -0.37 -0.13 1.00
## tb_year2015 1.06 0.08 0.91 1.21 1.00
## tb_year2016 1.07 0.08 0.92 1.22 1.00
## tb_year2017 0.92 0.08 0.77 1.07 1.00
## tb_year2018 0.20 0.09 0.03 0.37 1.00
## Bulk_ESS Tail_ESS
## Intercept 18133 25410
## scale_prop_adults_mean 16997 24775
## scale_perc_never_primary_mean 18218 26704
## scale_clinic_distance_1km 17552 25411
## tb_year2015 25266 30939
## tb_year2016 25594 30585
## tb_year2017 25767 30568
## tb_year2018 28496 31935
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(notif_model_rintc_25)
# now do the same for the notification data
post_tb_notifs_m25 <- posterior_samples(notif_model_rintc_25)
# add the posterior intercepts to the random effects for each cluster
notif_m_sum <- post_tb_notifs_m25 %>%
janitor::clean_names() %>%
select(b_intercept, contains("r_cluster")) %>%
mutate(across(contains("r_cluster"), ~ .x + b_intercept)) %>%
select(-b_intercept) %>%
rename_with(.fn = ~ stringr::str_replace_all(., "r_cluster", "p_notif")) %>%
rename_with(.fn = ~ stringr::str_replace_all(., "_intercept", "")) %>%
mutate(across(everything(), ~ exp(.x)))
# have a look at the table
# Get the cluster totals
cluster_totals <- select(dat, cluster, total) %>%
pivot_wider(names_from = cluster, values_from = total) %>%
rename_with(.fn = ~ stringr::str_replace_all(., "c", "p_notif_"))
# convert to notifications per cluster
# notif_m_sum <- notif_m_sum %>%
# mutate(across(everything(), ~.x * cluster_totals[[cur_column()]]))
notif_m_sum <- map2_df(notif_m_sum, cluster_totals, ~ (.x * .y))
notif_m_sum